Large Event Analysis#

import pandas as pd
import numpy as np
import datetime as dt
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Hide code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_11232\3011563604.py:2: DtypeWarning: Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
  usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})

Data Filtering#

  1. Converting the date columns to datetime

  2. Date > 1960-01-01 and < 2023-01-01

  3. Longitude > -123 and < -113

  4. Latitude > 29 and < 39

Hide code cell source
#converting the Date column into datetime format
usgs["time"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs = usgs[(pd.to_datetime(usgs['time']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['time']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]

time = []
for i in usgs['time']:
    time.append(pd.to_datetime(i))
usgs['time'] = time
usgs.head()
time latitude longitude depth mag magType nst gap dmin rms ... updated place type horizontalError depthError magError magNst status locationSource magSource\r
240 2022-12-31 33.397500 -116.393333 3.88 4.14 mw 132 16 0.07391 0.19 ... 2023-09-22T21:50:30.029Z 16 km N of Borrego Springs, CA earthquake 0.1 0.38 NaN 6 reviewed ci ci\r
241 2022-12-31 34.355667 -116.921833 4.73 3.47 mw 121 25 0.07845 0.15 ... 2023-03-07T19:00:01.040Z 11km SSE of Lucerne Valley, CA earthquake 0.09 0.41 NaN 4 reviewed ci ci\r
246 2022-12-22 37.620167 -122.025000 3.82 3.34 mw 141 16 NaN 0.16 ... 2023-04-20T04:34:00.806Z 3km N of Union City, CA earthquake 0.1 0.17 NaN 3 reviewed nc nc\r
262 2022-12-17 37.918167 -122.304000 5.48 3.57 mw 170 19 0.01598 0.15 ... 2023-07-27T08:15:34.318Z 1km ENE of El Cerrito, CA earthquake 0.1 0.17 NaN 4 reviewed nc nc\r
263 2022-12-13 36.604667 -121.209333 8.88 3.28 ml 67 55 0.03812 0.09 ... 2023-02-18T22:04:08.040Z 10km NW of Pinnacles, CA earthquake 0.14 0.28 0.129 72 reviewed nc nc\r

5 rows × 22 columns

Data Grouping And Merging#

Data is grouped into 1 day chunks based on the max magnitude and earthquake multiplicity and filtered based on the top values

usgs_grouped_counts = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).mag.count())
usgs_grouped_counts.rename(columns={'mag':'count'}, inplace=True)
usgs_grouped_counts.reset_index(inplace=True)
usgs_grouped_counts['time'] = usgs_grouped_counts['time'].dt.to_timestamp()

usgs_grouped_max = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).mag.max())
usgs_grouped_max.reset_index(inplace=True)
usgs_grouped_max['time'] = usgs_grouped_max['time'].dt.to_timestamp()
#filter usgs_grouped_counts to the 10 largest values
usgs_grouped_counts_top10 = usgs_grouped_counts.nlargest(10, 'count')
usgs_grouped_counts_top10.head(10)
time count
10743 2019-07-06 605
9330 2010-04-05 233
6007 1992-06-28 226
3155 1979-10-16 189
6008 1992-06-29 186
7561 1999-10-16 181
6387 1994-01-17 148
4000 1983-05-03 147
1377 1971-02-09 145
3298 1980-05-26 139
#filter the usgs_grouped_max to the 10 largest values 
usgs_grouped_max_top10 = usgs_grouped_max.nlargest(10, 'mag')
usgs_grouped_max_top10.head(10)
time mag
6007 1992-06-28 7.3
9329 2010-04-04 7.2
7561 1999-10-16 7.1
10743 2019-07-06 7.1
5466 1989-10-18 6.9
3999 1983-05-02 6.7
6387 1994-01-17 6.7
939 1968-04-09 6.6
1377 1971-02-09 6.6
5040 1987-11-24 6.6
one_week = dt.timedelta(days=7)
usgs_grouped_counts_top10 = usgs_grouped_counts_top10.sort_values(by='time', ascending=True)
usgs_grouped_max_top10 = usgs_grouped_counts_top10.sort_values(by='time', ascending=True)

Time Before/After Large Events#

import plotly.express as px
import plotly.graph_objects as go

Time before/after a day of high earthquake multiplicity

Hide code cell source
filtered_data_df = pd.DataFrame()

# Plotting 1 week before/after a day of high count of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs_grouped_counts[(usgs_grouped_counts['time'] >= previous_time) & (usgs_grouped_counts['time'] < current_time)].copy()
    filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days  # Calculate days until the large value
    
    filtered_data_after = usgs_grouped_counts[(usgs_grouped_counts['time'] <= next_time) & (usgs_grouped_counts['time'] >= current_time)].copy()
    filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days  # Calculate days until the large value
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])

fig = go.Figure()

scatter = go.Scatter(
    x=filtered_data_df['days_until_large_value'],
    y=filtered_data_df['count'],
    mode='markers',
    marker=dict(
        size= 6,
        color=filtered_data_df['count'],
        colorscale='Viridis',
        colorbar=dict(title='Number of Earthquakes'),
    ),
    text=filtered_data_df['time'],
)

fig.add_trace(scatter)

fig.update_layout(
    title='Top 10 Highest Earthquake Counts, 1 Week Before/After',
    xaxis=dict(title='Days Until Large Value'),
    yaxis=dict(title='Number of Earthquakes'),
    width=900,
    height=600,
)

fig.update_xaxes(title_text='Days Before/After Spike')
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()

Time before/after a large earthquake

Hide code cell source
filtered_data_df = pd.DataFrame()

# Plotting 1 week before/after a large earthquake
for index, row in usgs_grouped_max_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs_grouped_max[(usgs_grouped_max['time'] >= previous_time) & (usgs_grouped_max['time'] < current_time)].copy()
    filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days  # Calculate days until the large value
    
    filtered_data_after = usgs_grouped_max[(usgs_grouped_max['time'] <= next_time) & (usgs_grouped_max['time'] >= current_time)].copy()
    filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days  # Calculate days until the large value
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])

fig = go.Figure()

scatter = go.Scatter(
    x=filtered_data_df['days_until_large_value'],
    y=filtered_data_df['mag'],
    mode='markers',
    marker=dict(
        size=6, 
        color=filtered_data_df['mag'],
        colorscale='Viridis',
        colorbar=dict(title='Magnitude'),
    ),
    text=filtered_data_df['time'],
)

fig.add_trace(scatter)

fig.update_layout(
    title='Top 10 Largest Earthquakes, 1 Week Before/After',
    xaxis=dict(title='Days Before/After Spike'),
    yaxis=dict(title='Magnitude'),
    width=900,
    height=600,
)

fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()

Earthquake Locations During Spikes#

Earthquake Locations Before/After a day of large amount of earthquakes

Hide code cell source
filtered_data_df = pd.DataFrame()

# Plotting locations of earthquakes 1 week before/after a day of high count of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs[(usgs['time'] >= previous_time) & (usgs['time'] < current_time)].copy()
    filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days
    
    filtered_data_after = usgs[(usgs['time'] <= next_time) & (usgs['time'] >= current_time)].copy()
    filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days
    
    # Add a new column 'time_group' to identify different times for symbols
    filtered_data_before['time_group'] = str(current_time)
    filtered_data_after['time_group'] = str(current_time)
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])

fig = go.Figure()

color_scale_min = -6
color_scale_max = 6

for time_group in filtered_data_df['time_group'].unique():
    subset_df = filtered_data_df[filtered_data_df['time_group'] == time_group]
    
    fig.add_trace(go.Scatter(
        x=subset_df['longitude'],
        y=subset_df['latitude'],
        mode='markers',
        marker=dict(
            size=subset_df['mag'],
            sizemode='diameter',
            sizeref=0.4,
            color=subset_df['days_until_large_value'],
            symbol='circle',
            colorbar=dict(
                tickfont=dict(
                    size=12
                ),
                x=0.5,
                y=-0.2,
                orientation='h',
                len=1.0,
                title='Days +/- Large Event',
            ),
            showscale=True,
            colorscale='Viridis',
            cmin = color_scale_min,
            cmax = color_scale_max
        ),
        text=subset_df['time'],
        name=str(time_group)
    ))

# Update layout
fig.update_layout(
    width=800,
    height=800,
    title='Earthquake Locations Before/After Large Count Of Earthquakes',
    xaxis=dict(title='Longitude'),
    yaxis=dict(title='Latitude'),
    dragmode='pan',
    legend=dict(
        title='Time Groups',
        font=dict(
            size=12
        ),
    ),
)

fig.show()

Earthquake Locations Before/After a large earthquake

Hide code cell source
filtered_data_df = pd.DataFrame()

# Plotting locations of earthquakes 1 week before/after a large  earthquake
for index, row in usgs_grouped_max_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs[(usgs['time'] >= previous_time) & (usgs['time'] < current_time)].copy()
    filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days
    
    filtered_data_after = usgs[(usgs['time'] <= next_time) & (usgs['time'] >= current_time)].copy()
    filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days
    
    # Add a new column 'time_group' to identify different times for symbols
    filtered_data_before['time_group'] = str(current_time)
    filtered_data_after['time_group'] = str(current_time)
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])

fig = go.Figure()

color_scale_min = -6
color_scale_max = 6

for time_group in filtered_data_df['time_group'].unique():
    subset_df = filtered_data_df[filtered_data_df['time_group'] == time_group]
    
    fig.add_trace(go.Scatter(
        x=subset_df['longitude'],
        y=subset_df['latitude'],
        mode='markers',
        marker=dict(
            size=subset_df['mag'],
            sizemode='diameter',
            sizeref=0.4,
            color=subset_df['days_until_large_value'],
            symbol='circle',
            colorbar=dict(
                tickfont=dict(
                    size=12
                ),
                x=0.5,
                y=-0.2,
                orientation='h',
                len=1.0,
                title='Days +/- Large Event',
            ),
            showscale=True,
            colorscale='Viridis',
            cmin = color_scale_min,
            cmax = color_scale_max
        ),
        text=subset_df['time'],
        name=str(time_group)
    ))

# Update layout
fig.update_layout(
    width=800,
    height=800,
    title='Earthquake Locations Before/After Large Earthquake',
    xaxis=dict(title='Longitude'),
    yaxis=dict(title='Latitude'),
    dragmode='pan',
    legend=dict(
        title='Time Groups',
        font=dict(
            size=12
        ),
    ),
)

# Show the interactive plot
fig.show()

Energy Calculation And Filtering#

  1. Converting the date columns to datetime

  2. Date > 1960-01-01 and < 2023-01-01

  3. Longitude > -123 and < -113

  4. Latitude > 29 and < 39

  5. Converting the magnitudes to energy through the formula: 1/15 * log(10^(1.5*mag))

csv_file = "../datasets/All (1960-2023).csv"
usgs_energy = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
usgs_energy["time"] = pd.to_datetime(usgs_energy["time"], errors="coerce")
usgs_energy['mag'] = pd.to_numeric(usgs_energy['mag'], errors='coerce')
Hide code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_11232\317267901.py:2: DtypeWarning:

Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
Hide code cell source
usgs_energy = usgs_energy[(usgs_energy['time'] > '1960-01-01') & (usgs_energy['time'] < '2023-01-01')]

usgs_energy['longitude'] = pd.to_numeric(usgs_energy['longitude'], errors='coerce')
usgs_energy['latitude'] = pd.to_numeric(usgs_energy['latitude'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs_energy = usgs_energy[usgs_energy['longitude'] > -123]
usgs_energy = usgs_energy[usgs_energy['longitude'] < -113]
usgs_energy = usgs_energy[usgs_energy['latitude'] < 39]
usgs_energy = usgs_energy[usgs_energy['latitude'] > 29]
formula_constant = (1/1.5)
usgs_energy['energy'] = 10**(1.5*usgs_energy['mag'])
usgs_energy['energy'] = np.log(usgs_energy['energy']) * formula_constant
from datetime import datetime
usgs_energy['time'] = pd.to_datetime(usgs_energy['time']).dt.strftime("%Y-%m-%d %H:%M:%S.%f%z")
usgs_energy['time'] = usgs_energy['time'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f%z"))
Hide code cell source
usgs_grouped_energy = pd.DataFrame(usgs_energy.groupby(usgs_energy['time'].dt.to_period('S')).energy.sum())
usgs_grouped_energy.reset_index(inplace=True)
usgs_grouped_energy['time'] = usgs_grouped_energy['time'].dt.to_timestamp()
Hide code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_11232\3270407175.py:1: UserWarning:

Converting to PeriodArray/Index representation will drop timezone information.
usgs_grouped_energy.head()
time energy
0 1960-01-02 22:51:45 9.302444
1 1960-01-05 18:01:47 6.976833
2 1960-01-07 17:51:32 8.381410
3 1960-01-08 06:51:21 7.138014
4 1960-01-11 19:08:39 8.726798

Energy Before/After Large Events#

Hide code cell source
filtered_data_df = pd.DataFrame()

# Plotting 1 week before/after a day with a large number of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
    current_time = row['time']
    previous_time = current_time - one_week
    next_time = current_time + one_week
    
    filtered_data_before = usgs_grouped_energy[(usgs_grouped_energy['time'] >= previous_time) & (usgs_grouped_energy['time'] < current_time)].copy()
    filtered_data_before['time_until_large_value'] = (current_time - filtered_data_before['time'])
    
    filtered_data_after = usgs_grouped_energy[(usgs_grouped_energy['time'] <= next_time) & (usgs_grouped_energy['time'] >= current_time)].copy()
    filtered_data_after['time_until_large_value'] = (current_time - filtered_data_after['time'])
    
    filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])

fig = go.Figure()

scatter = go.Scatter(
    x=filtered_data_df['time_until_large_value'],
    y=filtered_data_df['energy'],
    mode='markers',
    marker=dict(
        size=6,
        color=filtered_data_df['energy'],
        colorscale='Viridis',
        colorbar=dict(title='Energy'),
    ),
    text=filtered_data_df['time'],
)

fig.add_trace(scatter)

fig.update_layout(
    title='Top 10 Largest Earthquake Count Days, 1 Week Before/After (Energy)',
    xaxis=dict(title='Days Before/After Spike'),
    yaxis=dict(title='Energy'),
    width=900,
    height=600,
)

# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

fig.show()